trainer:
  checkpoint_interval: 50
  max_to_keep: 10
  # manually override to finished_step + 500 finetune
  train_steps: 0
  optimizer_config:
    optimizer:
      type: 'adamw'
    learning_rate:
      constant:
        learning_rate: 0.0005
      type: constant
    warmup:
      polynomial:
        warmup_steps: 50
